
# A4c_price_index_agg_full_adj
#==============================================================================

# Description: This code computes the difference in the gains from trade between
# poor and rich on average on aggregate in the full model, when the largest categories
# are excluded

rm(list = ls())
options(warn=-1)

setwd("D:/data_replication")

library('data.table')
library('stargazer')
library('ggplot2')
library("ggrepel")  


# Import and format list of products for each country
#===============================================================================

product_id <- fread("estimation/2_product_list/product_id_declarant.csv")

pc8plus_i_k <- fread("estimation/4_demand_estimation/pc8plus_i_k.csv")
names(pc8plus_i_k) <- c("product_id", "V1", "k")
pc8plus_i_k <- pc8plus_i_k[ , .(product_id, k)]
pc8plus_i_k[ , product_id := as.integer(product_id)]

k_to_i_k <- fread("estimation/5_supply_side/k_to_i_k.csv")

product_id <- merge(product_id, pc8plus_i_k, by = "product_id", all = T)
product_id <- product_id[k != 0]
product_id <- product_id[ , .(declarant, k)]

product_id <- merge(product_id, k_to_i_k, by = "k", all = T)
product_id <- product_id[ , .(i_k, declarant)]

#===============================================================================


# Import and format product-specific price indexes
#===============================================================================

load('estimation/5_supply_side/data_PI_full_adj.RData')

data <- data[beta_price_20 < 0]
data <- data[beta_price_80 < 0]

data$P_true_sorted_20 <- data$P_true_sorted_20 * (1 - data$index_mc)
data$P_true_sorted_80 <- data$P_true_sorted_80 * (1 - data$index_mc)
data$P_counter_sorted_20 <- data$P_counter_sorted_20 * (1 - data$index_mc)
data$P_counter_sorted_80 <- data$P_counter_sorted_80 * (1 - data$index_mc)

data[ , u_beta_true_20 := log(P_true_sorted_20) - 1]                                     
data[ , u_beta_true_80 := log(P_true_sorted_80) - 1]                                    
data[ , u_beta_counter_20 := log(P_counter_sorted_20) - 1]
data[ , u_beta_counter_80 := log(P_counter_sorted_80) - 1]

# Create difference between poor and rich
#-------------------------------------------------------------------------------

data[ , diff := P_counter_sorted_20 / P_true_sorted_20 - P_counter_sorted_80 / P_true_sorted_80]

#===============================================================================


# Merge in Cobb-Douglas Weights
#===============================================================================

setnames(product_id, "declarant", "Declarant")
setnames(product_id, "i_k", "product")
product_id[ , index_i_k := 1]
data = merge(data, product_id, by = c("product", "Declarant"), all = T)
data <- data[index_i_k == 1]

w = read.csv("estimation/4_demand_estimation/4_cobb_douglas_weights/weights_i_k.csv")
colnames(w)[1:4] = c("product", "Year", "Quarter", "Declarant")
data = merge(data, w, by = c('product', 'Year', 'Quarter', 'Declarant'))
data = data[s_20_i_k != 0]
data = data[s_80_i_k != 0]

#===============================================================================


# Construct Price Index
#===============================================================================

# Remove extreme observations
#-------------------------------------------------------------------------------

data_PI = data
data_PI = data_PI[diff < 10 & diff > -10]
data_PI = data_PI[(is.na(u_beta_counter_80) | is.na(u_beta_counter_20) | is.na(u_beta_true_80) | is.na(u_beta_true_20)) == F]
data_PI = data_PI[(abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]

data_PI[ , v_true_20 := beta_price_20 * log(P_true_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_counter_20 := beta_price_20 * log(P_counter_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_true_80 := beta_price_80 * log(P_true_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]
data_PI[ , v_counter_80 := beta_price_80 * log(P_counter_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]

data_PI = data_PI[(v_true_20 == Inf | v_counter_20 == Inf | v_true_80 == Inf | v_counter_80 == Inf) == F]
data_PI = data_PI[(v_true_20 == -Inf | v_counter_20 == -Inf | v_true_80 == -Inf | v_counter_80 == -Inf) == F]

# Remove largest categories and readjust weights
#-------------------------------------------------------------------------------

setkey(data_PI, s_80_i_k)
data_PI <- data_PI[, .SD[((s_80_i_k < quantile(s_80_i_k, probs = 0.99)) & (s_20_i_k < quantile(s_20_i_k, probs = 0.99)))], by = 'Declarant']

data_PI[ , sum_w := sum(s_20_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_20_i_k := s_20_i_k / sum_w]

data_PI[ , sum_w := sum(s_80_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_80_i_k := s_80_i_k / sum_w]

# Compute Price Index
#-------------------------------------------------------------------------------

data_PI <- data_PI[ , PI_1 := exp(1 - sum(s_20_i_k*log(s_20_i_k))), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_true_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_true_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_counter_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_counter_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , diff := PI_counter_20 / PI_true_20 - PI_counter_80 / PI_true_80]
data_PI <- data_PI[ , gft_20 := PI_counter_20 / PI_true_20]
data_PI <- data_PI[ , gft_80 := PI_counter_80 / PI_true_80]
data_PI <- data_PI[ , .SD[1], by = c('Year', 'Quarter', 'Declarant')]

#===============================================================================


# Run Regression, save, and plot
#===============================================================================

Reg_P_summary <- lm(diff ~ 0 + as.factor(Declarant), data = data_PI)
reg_gft_20 <- lm(gft_20 ~ 0 + as.factor(Declarant), data = data_PI)
reg_gft_80 <- lm(gft_80 ~ 0 + as.factor(Declarant), data = data_PI)

declarant <- as.matrix(Reg_P_summary$coefficients)
declarant <- gsub("as.factor\\(Declarant\\)","",rownames(declarant))
declarant <- as.numeric(declarant)
income_data <- fread(file = "estimation/5_supply_side/gdp_per_capita_2007.csv")

# Save Regression

output <- merge(income_data, data.table(declarant, Reg_P_summary$coefficients), by = "declarant")
colnames(output) <- c(colnames(output)[1:4], "FE_declarant")
write.csv(file = "estimation/5_supply_side/price_index/results/diff_PI_full_adj.csv", output)

output_gft_20 <- merge(income_data, data.table(declarant, reg_gft_20$coefficients), by = "declarant")
colnames(output_gft_20) <- c(colnames(output)[1:4], "gft_20")
output_gft_80 <- merge(income_data, data.table(declarant, reg_gft_80$coefficients), by = "declarant")
colnames(output_gft_80) <- c(colnames(output)[1:4], "gft_80")
output_gft_80$year = NULL
output_gft_80$country = NULL
output_gft_80$gdp_per_capita_declarant = NULL
output_gft <- merge(output_gft_20, output_gft_80, by = "declarant")
write.csv(file = "robustness/overall_PI/gft_full_adj_agg.csv", output_gft)

# Plot
#-------------------------------------------------------------------------------

output <- fread('estimation/5_supply_side/price_index/results/diff_PI_full_adj.csv')

iso3 <- fread('estimation/5_supply_side/ISO3_declarant_codes.csv')

output[ , gdp_log := log(gdp_per_capita_declarant)]
setkey(output, declarant)
setkey(iso3, declarant)

output <- output[iso3]
output <- output[is.na(country) == F]
output = output[declarant != 18] 
output = output[declarant != 46] 
output = output[declarant != 600] 

reg <- lm(FE_declarant ~ gdp_log, data = output)
summary(reg)
cor(output$FE_declarant, output$gdp_log)

plot_diff_full_adj <- ggplot(output, aes(x=gdp_log, y=FE_declarant)) +
  geom_text_repel(label=output$ISO3, size = 5) +
  theme(text = element_text(size=17)) +
  geom_smooth(method=lm) +
  labs(x = "GDP per capita (in logs)", y = expression(paste(Delta, "Welfare"["Poor"]," - ", Delta, "Welfare"["Rich"]))) 

ggsave("estimation/5_supply_side/price_index/results/plot_PI_full_adj_out.png", plot = plot_diff_full_adj)


